Definitions:

library(tidyverse)
library(kableExtra)
library(scales)
library(plotly)
library(glue)
library(here)
library(viridis)
library(ggrepel)

all_plots <- FALSE

m <- list(
    l = 50,
    r = 50,
    b = 200,
    t = 150,
    pad = 0.5
)


theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 14,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size =14),               #font size
      
      axis.text.x = element_text(              #axis text
                   size = 12),
      legend.text = element_text(size = 10),
      legend.title = element_text(size = 11, face="bold"),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 18, face = "bold", color="white"),
       strip.text.y = element_text(size = 18, face = "bold", color="white"),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}
# load companies file of EIN to name and endowment data

companies_to_ein <- readRDS(here("data", "companies.RDS")) 


endowment_data <- read_rds(here("data", 
                                "endowments_by_most_recent_filings.RDS")) %>%
  select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
  pivot_longer(-c(EIN, fiscal_year),
               names_to = "variable_name") %>%
  left_join(companies_to_ein) %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year)),
         organization_name = ifelse(is.na(organization_name),
                                    EIN, organization_name))
# extract return dates
source(here("GET_VARS.R"))

files <- dir(here("ballet_990_released_20230208"),
              full.names = TRUE)


dates <- map_df(files,
                ~get_df(filename = .x, 
                        variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
  mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
   filter_ein()

saveRDS(dates, here('data', 'dates.RDS')) 
dates <- readRDS( here('data', 'dates.RDS')) %>%
  select(EIN, TaxPeriodEndDt, fiscal_year) 
 

endowment_data <- endowment_data %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
  left_join(dates)


endowment_data_wide <- endowment_data %>% 
  pivot_wider(names_from=variable_name,
              values_from=value) 
# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {

  
   plt <- data %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep") %>%
    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = organization_name,
               label =EIN
               )) +
    geom_point() +
    geom_function(fun=function(x)x,color="darkred", alpha = .8) +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank")) +
    theme_bw() +
    labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
     facet_wrap(~fiscal_year)+
      theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) 
  
  ggplotly(plt, margin = m, height = 550) %>%
    partial_bundle()

}


plot_ranks_by_consistency <- function(var1, var2, data) {

  
   plt <- data %>%
     filter(fiscal_year > 2010 & fiscal_year < 2021) %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(-!!sym(var1)), na.last = "keep") %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(-!!sym(var2)),  na.last = "keep")  %>%
     mutate(rank_diff = !!sym(glue("{var2}_rank")) - !!sym(glue("{var1}_rank" ))) %>%
   group_by(EIN) %>%
   mutate(sum_pos = sum(rank_diff >0, na.rm=TRUE),
          sum_neg = sum(rank_diff < 0,  na.rm=TRUE),
          sum_zero = sum(rank_diff ==0, na.rm=TRUE))%>%
     ungroup() %>% 
   mutate(prop_positive = sum_pos / (sum_pos + sum_neg + sum_zero)) %>%
    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = prop_positive,
               group =organization_name
               )) +
    geom_function(fun=function(x)x,color="darkred", alpha = .8, n =201) +
    geom_point() +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank"),
         title = glue("Rank of {var2} vs. Rank of {var1}"),
         color = glue("Proportion of Years\nWhere {var1}\nRanked Higher than\n{var2}")) +
    theme_c(legend.text=element_text(size =8)) +
    scale_color_gradient2(high="#5935CF", low="#D18E01", mid="#E2E2E2", limits=c(0,1), midpoint=.5) +
     facet_wrap(~fiscal_year, ncol=5)+
      theme(plot.title = element_text(size = 12, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) +
     scale_x_reverse() +
     scale_y_reverse()
  
  return(plt)

}
vars <-  unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]

# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
  as.data.frame()

if (!all_plots) variable_combinations <- variable_combinations[1:4,]

Where Endowments are Held

#######################################
# CREATE BIG PALETTE WITH MANY COLORS
#######################################

pal1 <- viridis_pal(option="mako", end = .8)(12)
pal2 <- viridis_pal(option="rocket", end = .8)(12)
pal3 <- viridis_pal(option="magma", end = .8)(12)
pal4 <- viridis_pal(option="inferno", end = .8)(12)

pal <- c(pal1, pal2, pal3, pal4)
indexes <- sample.int(length(pal), replace=FALSE)
pal <- pal[indexes]
###################################################################################################
# table of organization that had endowment in single category for all years on file
###################################################################################################
 endowment_data_wide %>%
  select(contains("EOY"),fiscal_year, 
         EIN, organization_name) %>%
   group_by(organization_name) %>%
   summarize(across(contains("EOY"),~ mean(.x, na.rm=TRUE))) %>%
   filter(if_any(contains("EOY"), ~.x==1 )) %>%
   pivot_longer(contains("EOY")) %>%
  mutate(name = case_when(
    name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment",
    name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment",
    name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment"
  )) %>%
   filter(!is.na(value) & value !=0) %>%
   group_by(name) %>%
   summarize(`Organization Name` = paste0(organization_name, collapse = "\n")) %>%
   select(`Endowment Type` = name, 
          `Organization Name`) %>%
   kbl() %>%
   row_spec(row=0, color="white", background="#3E3D3D")
Endowment Type Organization Name
Board designated or quasi-endowment Ballet Quad Cities Canyon Concert Ballet The Tallahassee Ballet
Permanent endowment American Repertory Ballet Aspen Santa Fe Ballet Ballet West BalletMet Colorado Ballet Dance Theatre of Harlem Madison Ballet New Mexico Ballet Company Oregon Ballet Theatre Orlando Ballet Pittsburgh Ballet Theatre
Temporarily restricted endowment Ballet Des Moines First State Ballet Theatre
###################################################################################################
# split labels into 2 groups, one labeled on earliest date, one on latest
###################################################################################################
set.seed(999)
all_eins <- unique(endowment_data_wide$EIN)
all_eins <- endowment_data_wide %>% 
  # arrange(desc(BeginningYearBalanceAmt)) %>%
  # select(EIN, organization_name) %>%
  # distinct() %>%
  pull(EIN) %>%
  unique()

indexes_all <- 1:length(all_eins)
first <- sample.int(floor(length(all_eins)/2), replace=FALSE)
ein_beginning <- all_eins[first]
ein_end <- all_eins[indexes_all[!indexes_all %in% first]]
ein_end = all_eins[indexes_all]


ranks <- endowment_data_wide %>%
  filter(fiscal_year <= 2020) %>%
  group_by(organization_name) %>%
  summarize(fiscal_year = max(fiscal_year),
            BeginningYearBalanceAmt = BeginningYearBalanceAmt[which.max(fiscal_year)]) %>%
  mutate(rank = rank(-BeginningYearBalanceAmt)) 

top_20 <- ranks %>% filter(rank <= 20)
not_top_20 <- ranks %>% filter(rank > 20)

  

dat <- endowment_data_wide %>%
  select(contains("EOY"),fiscal_year, 
         EIN, organization_name) %>%
  mutate(rank_category = ifelse(organization_name %in% top_20$organization_name,
                                "Endowment Ranked in Top 20", 
                                "Endowment Not Ranked in Top 20")) %>%
  pivot_longer(cols = contains("EOY"))  %>%
  mutate(name = case_when(
    name == "TermEndowmentBalanceEOYPct" ~ "Temporarily restricted endowment",
    name == "PrmnntEndowmentBalanceEOYPct" ~ "Permanent endowment",
    name == "BoardDesignatedBalanceEOYPct" ~ "Board designated or quasi-endowment"
  )) %>%
  filter(!is.na(value)) %>%
  group_by(organization_name, name) %>%
  mutate(xlabel = ifelse(EIN %in% ein_beginning,
                         min(fiscal_year),
                         max(fiscal_year)),
         value=100*value,
         ylabel = ifelse(EIN %in% ein_beginning,
                         value[which.min(fiscal_year)],
                         value[which.max(fiscal_year)]) )

dat_labels <- dat %>%
  select(organization_name,xlabel,ylabel,name,rank_category) %>%
  distinct()


 dat %>%
   ggplot(aes(x=fiscal_year,
             y  = value, 
             color = organization_name)) +
  geom_line(show.legend=FALSE,
            alpha = .95) +
  facet_wrap(~name) +
  theme_c(strip.text = element_text(margin =margin(3,0,25,0),
                                    size =4,
                                    lineheight=.5),
          strip.text.y = element_text(angle=0, color="white", size =18),
          legend.position="none") +
  scale_color_manual(values=pal) +
   geom_point(size=.5, alpha =.5) +
  labs(y="Percentage of Endowment in Category",
       x = "Fiscal Year") +
  geom_label_repel(aes(label = organization_name,
                 y = ylabel,
                 x = xlabel,  
                 color=organization_name),
             size = 2,
             seed=124,
             data=dat_labels,
              min.segment.length=1,
             label.size = 1.2,
             force=.8,
             # direction="y",
             force_pull = 8,
             max.overlaps = 20) +
  geom_label_repel(aes(label = organization_name,
                color=organization_name,
                 y = ylabel, x = xlabel),
              color = "black",
             label.size = NA,
             size = 2,
             seed=124,
             data=dat_labels,
             min.segment.length=1,
             force=.8,
             # direction="y",
             force_pull=8,
             max.overlaps = 20) +
   facet_grid(rank_category~name)

Rankings

# plotlist <- pmap(variable_combinations, ~{ 
#  #plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
#    plt <- plot_ranks_by_consistency(var1 = .x, var2 = .y, data = endowment_data_wide) %>% partial_bundle()
# 
#  }
# )

plot_ranks_by_consistency("BeginningYearBalanceAmt",
                          "ContributionsAmt",
                          data = endowment_data_wide) +
  labs(title = "Rank of Endowment Beginning of Year Balance versus the Rank of Contributions",
       y = "Rank of Contributions",
       x = "Rank of Beginning of Year Balance")  + 
  theme(axis.text.x = element_text(size =9),
        axis.text.y=  element_text(size =9),
        strip.text=element_text(size = 16, color = "white"),
        plot.title=element_text(hjust = .5, face="bold", size = 18))

plot_ranks_by_consistency("BeginningYearBalanceAmt",
                          "OtherExpendituresAmt",
                           data = endowment_data_wide)+
  labs(title = "Rank of Beginning of Year Balance versus the Rank of Other Expenditures",
       y = "Rank of Other Expenditures",
       x = "Rank of Beginning of Year Balance") + 
  theme(axis.text.x = element_text(size =9),
        axis.text.y=  element_text(size =9),
        strip.text=element_text(size = 16, color = "white"),
        plot.title=element_text(hjust = .5, face="bold", size = 18))

How ranks change over time

# 
# 
# rankings_over_time <- endowment_data_wide %>%
#   filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
#   group_by(fiscal_year) %>%
#   mutate(rank = rank(-BeginningYearBalanceAmt, na.last = "keep")) %>%
#   filter(!is.na(rank)) %>%
#   ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
#   geom_line() +
#   scale_color_viridis(option = "rocket", discrete=TRUE) +
#   theme_c()
# 
# ggplotly(rankings_over_time, margin = m, height = 550) 


plot_ranks_over_time <- function(dat,var) {
  plt <- dat %>%
    filter(fiscal_year >=2011 & fiscal_year <=2020) %>%
    group_by(fiscal_year) %>%
    mutate(rank = rank(-!!sym(var), na.last = "keep")) %>%
    filter(!is.na(rank)) %>%
    ggplot(aes(x=fiscal_year, y = rank, color =organization_name)) +
    geom_line() +
    geom_point(alpha = .3, size =.5) +
    scale_color_viridis(option = "rocket", discrete=TRUE) +
    theme_c() +
    scale_y_continuous(n.breaks = 10) +
    scale_x_continuous(breaks=2011:2020) +
    labs(y = paste("Rank of ", var),
         x = "Fiscal Year",
         title = paste0("Rank over time for ", var))
  ggplotly(plt, margin = m, height = 550)
}

plotlist <- map(vars,~ {
  plt<- plot_ranks_over_time(endowment_data_wide,.x) %>% partial_bundle()
})


htmltools::tagList(setNames(plotlist, NULL))